{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "df33c614-2ecc-47a0-8600-bc891681997f",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Welcome to the Qualification Tool for the RAPIDS Accelerator for Apache Spark\n",
    "\n",
    "To run the qualification tool, enter the log path that represents the DBFS location of your Spark GPU event logs. Then, select \"Run all\" to execute the notebook. Once the notebook completes, various output tables will appear below. For more options on running the profiling tool, please refer to the [Qualification Tool User Guide](https://docs.nvidia.com/spark-rapids/user-guide/latest/qualification/quickstart.html#running-the-tool).\n",
    "\n",
    "### Note\n",
    "- Currently, local, S3 or DBFS event log paths are supported.\n",
    "- S3 path is only supported on Databricks AWS using [instance profiles](https://docs.databricks.com/en/connect/storage/tutorial-s3-instance-profile.html).\n",
    "- Eventlog path must follow the formats `/dbfs/path/to/eventlog` or `dbfs:/path/to/eventlog` for logs stored in DBFS.\n",
    "- Use wildcards for nested lookup of eventlogs. \n",
    "   - For example: `/dbfs/path/to/clusterlogs/*/*`\n",
    "- Multiple event logs must be comma-separated. \n",
    "   - For example: `/dbfs/path/to/eventlog1,/dbfs/path/to/eventlog2`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "5e9f5796-46ed-49ac-9d08-c8b98a87c39d",
     "showTitle": true,
     "title": "Set Tools Version"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "TOOLS_VER = \"25.04.0\"\n",
    "print(f\"Using Tools Version: {TOOLS_VER}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "313ee58b-61b3-4010-9d60-d21eceea796c",
     "showTitle": true,
     "title": "Install Package"
    }
   },
   "outputs": [],
   "source": [
    "%pip install spark-rapids-user-tools==$TOOLS_VER > /dev/null"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "acf401a3-12d3-4236-a6c5-8fe8990b153a",
     "showTitle": true,
     "title": "Environment Setup"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "\n",
    "def convert_dbfs_path(path):\n",
    "    return path.replace(\"dbfs:/\", \"/dbfs/\")\n",
    "  \n",
    "# Detect cloud provider from cluster usage tags\n",
    "valid_csps = [\"aws\", \"azure\"]\n",
    "CSP=spark.conf.get(\"spark.databricks.clusterUsageTags.cloudProvider\", \"\").lower()\n",
    "if CSP not in valid_csps:\n",
    "    print(f\"ERROR: Cannot detect cloud provider from cluster usage tags. Using '{valid_csps[0]}' as default. \")\n",
    "    CSP = valid_csps[0]\n",
    "else:\n",
    "    print(f\"Detected Cloud Provider from Spark Configs: '{CSP}'\")\n",
    "\n",
    "# Initialize variables from widgets\n",
    "dbutils.widgets.text(\"Eventlog Path\", \"/dbfs/user1/qualification_logs\")\n",
    "EVENTLOG_PATH=dbutils.widgets.get(\"Eventlog Path\")\n",
    "EVENTLOG_PATH=convert_dbfs_path(EVENTLOG_PATH)\n",
    "\n",
    "dbutils.widgets.text(\"Output Path\", \"/tmp\")\n",
    "OUTPUT_PATH=dbutils.widgets.get(\"Output Path\")\n",
    "\n",
    "  \n",
    "# Setup environment variables\n",
    "os.environ[\"CSP\"] = CSP\n",
    "os.environ[\"EVENTLOG_PATH\"] = EVENTLOG_PATH\n",
    "os.environ[\"OUTPUT_PATH\"] = OUTPUT_PATH\n",
    "\n",
    "# Setup console output file\n",
    "CONSOLE_OUTPUT_PATH = os.path.join(OUTPUT_PATH, 'console_output.log')\n",
    "CONSOLE_ERROR_PATH = os.path.join(OUTPUT_PATH, 'console_error.log')\n",
    "os.environ['CONSOLE_OUTPUT_PATH'] = CONSOLE_OUTPUT_PATH\n",
    "os.environ['CONSOLE_ERROR_PATH'] = CONSOLE_ERROR_PATH\n",
    "print(f'Console output will be stored at {CONSOLE_OUTPUT_PATH} and errors will be stored at {CONSOLE_ERROR_PATH}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "693b5ee0-7500-43f3-b3e2-717fd5468aa8",
     "showTitle": true,
     "title": "Run Qualification Tool"
    }
   },
   "outputs": [],
   "source": [
    "%sh\n",
    "spark_rapids qualification --platform databricks-$CSP --eventlogs \"$EVENTLOG_PATH\" -o \"$OUTPUT_PATH\" --verbose > \"$CONSOLE_OUTPUT_PATH\" 2> \"$CONSOLE_ERROR_PATH\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "f83af6c8-5a79-4a46-965b-38a4cb621877",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Console Output\n",
    "Console output shows the top candidates and their estimated GPU speedup.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "c61527b7-a21a-492c-bab8-77f83dc5cabf",
     "showTitle": true,
     "title": "Show Console Output"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "%sh\n",
    "cat $CONSOLE_OUTPUT_PATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "f3c68b28-fc62-40ae-8528-799f3fc7507e",
     "showTitle": true,
     "title": "Show Logs"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "%sh\n",
    "cat $CONSOLE_ERROR_PATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "05f96ca1-1b08-494c-a12b-7e6cc3dcc546",
     "showTitle": true,
     "title": "Parse Output"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "import re\n",
    "import shutil\n",
    "import os\n",
    "\n",
    "def extract_file_info(console_output_path, output_base_path):\n",
    "    try:\n",
    "        with open(console_output_path, 'r') as file:\n",
    "            stdout_text = file.read()\n",
    "        \n",
    "        # Extract log file location\n",
    "        location_match = re.search(r\"Location: (.+)\", stdout_text)\n",
    "        if not location_match:\n",
    "            raise ValueError(\"Log file location not found in the provided text.\")\n",
    "        \n",
    "        log_file_location = location_match.group(1)\n",
    "        \n",
    "        # Extract qualification output folder\n",
    "        qual_match = re.search(r\"qual_[^/]+(?=\\.log)\", log_file_location)\n",
    "        if not qual_match:\n",
    "            raise ValueError(\"Output folder not found in the log file location.\")\n",
    "        \n",
    "        output_folder_name = qual_match.group(0)\n",
    "        output_folder = os.path.join(output_base_path, output_folder_name)\n",
    "        return output_folder, log_file_location\n",
    "    \n",
    "    except Exception as e:\n",
    "        raise RuntimeError(f\"Cannot parse console output. Reason: {e}\")\n",
    "\n",
    "def copy_logs(destination_folder, *log_files):\n",
    "    try:\n",
    "        log_folder = os.path.join(destination_folder, \"logs\")\n",
    "        os.makedirs(log_folder, exist_ok=True)\n",
    "        \n",
    "        for log_file in log_files:\n",
    "            if os.path.exists(log_file):\n",
    "                shutil.copy2(log_file, log_folder)\n",
    "            else:\n",
    "                print(f\"Log file not found: {log_file}\")\n",
    "    except Exception as e:\n",
    "        raise RuntimeError(f\"Cannot copy logs to output. Reason: {e}\")\n",
    "\n",
    "try:\n",
    "    output_folder, log_file_location = extract_file_info(CONSOLE_OUTPUT_PATH, OUTPUT_PATH)\n",
    "    jar_output_folder = os.path.join(output_folder, \"rapids_4_spark_qualification_output\")\n",
    "    print(f\"Output folder detected {output_folder}\")\n",
    "    copy_logs(output_folder, log_file_location, CONSOLE_OUTPUT_PATH, CONSOLE_ERROR_PATH)\n",
    "    print(f\"Logs successfully copied to {output_folder}\")\n",
    "except Exception as e:\n",
    "    print(e)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "8c65adcd-a933-482e-a50b-d40fa8f50e16",
     "showTitle": true,
     "title": "Download Output"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "import shutil\n",
    "import os\n",
    "import re\n",
    "\n",
    "current_working_directory = os.getcwd()\n",
    "\n",
    "def create_destination_folders(folder_name):\n",
    "    os.makedirs(folder_name, exist_ok=True)\n",
    "    base_download_folder_path = os.path.join(\"/dbfs/FileStore/\", folder_name)\n",
    "    os.makedirs(base_download_folder_path, exist_ok=True) \n",
    "    return base_download_folder_path\n",
    "\n",
    "def create_download_link(source_folder, destination_folder_name):\n",
    "    folder_to_compress = os.path.basename(source_folder)\n",
    "    zip_file_name = folder_to_compress + '.zip'\n",
    "    local_zip_file_path = os.path.join(current_working_directory, destination_folder_name, zip_file_name)\n",
    "    download_folder_path = os.path.join(destination_folder_name, zip_file_name)\n",
    "    try:\n",
    "        base_download_folder_path = create_destination_folders(destination_folder_name)\n",
    "        shutil.make_archive(folder_to_compress, 'zip', source_folder)\n",
    "        shutil.copy2(zip_file_name, base_download_folder_path)\n",
    "        if os.path.exists(local_zip_file_path):\n",
    "            os.remove(local_zip_file_path)\n",
    "        shutil.move(zip_file_name, local_zip_file_path)\n",
    "    \n",
    "        download_button_html = f\"\"\"\n",
    "        <style>\n",
    "            .download-btn {{\n",
    "                display: inline-block;\n",
    "                padding: 10px 20px;\n",
    "                font-size: 16px;\n",
    "                color: white;\n",
    "                background-color: #4CAF50;\n",
    "                text-align: center;\n",
    "                text-decoration: none;\n",
    "                border-radius: 5px;\n",
    "                border: none;\n",
    "                cursor: pointer;\n",
    "                margin: 15px auto;\n",
    "            }}\n",
    "            .download-btn:hover {{\n",
    "                background-color: #45a049;\n",
    "            }}\n",
    "            .button-container {{\n",
    "                display: flex;\n",
    "                justify-content: center;\n",
    "                align-items: center;\n",
    "            }}\n",
    "        </style>\n",
    "        \n",
    "        <div style=\"color: #444; font-size: 14px; text-align: center; margin: 10px;\">\n",
    "            Zipped output file created at {local_zip_file_path}\n",
    "        </div>\n",
    "        <div class='button-container'>\n",
    "            <a href='/files/{download_folder_path}' class='download-btn'>Download Output</a>\n",
    "        </div>\n",
    "        \"\"\"\n",
    "        displayHTML(download_button_html)\n",
    "    except Exception as e:\n",
    "        error_message_html = f\"\"\"\n",
    "        <div style=\"color: red; text-align: center; margin: 20px;\">\n",
    "            <strong>Error:</strong> Cannot create download link for {source_folder}. Reason: {e}\n",
    "        </div>\n",
    "        \"\"\"\n",
    "        displayHTML(error_message_html)\n",
    "\n",
    "destination_folder_name = \"Tools_Output\"\n",
    "create_download_link(output_folder, destination_folder_name)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "bbe50fde-0bd6-4281-95fd-6a1ec6f17ab2",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "\n",
    "## Summary Output\n",
    "\n",
    "The report provides a comprehensive overview of the entire application execution, estimated speedup, including unsupported operators and non-SQL operations. By default, the applications and queries are sorted in descending order based on the following fields:\n",
    "\n",
    "- Estimated GPU Speedup Category\n",
    "- Estimated GPU Speedup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "b8bca4a6-16d8-4b60-ba7b-9aff64bdcaa1",
     "showTitle": true,
     "title": "qualification_summary.csv"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "summary_output=pd.read_csv(os.path.join(output_folder, \"qualification_summary.csv\"))\n",
    "summary_output=summary_output.drop(columns=[\"Unnamed: 0\"]).rename_axis('Index').reset_index()\n",
    "display(summary_output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "73b5e0b0-3a96-4cc6-8e6c-840e4b0d9d43",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "\n",
    "## Application Status\n",
    "\n",
    "The report show the status of each eventlog file that was provided\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "c9ffbfdb-dbb6-4736-b9cb-2ac457cc6714",
     "showTitle": true,
     "title": "rapids_4_spark_qualification_output_status.csv"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "status_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_status.csv\"))\n",
    "display(status_output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "09945d39-f9c2-4f4a-8afd-4f309f24f8e0",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "\n",
    "## Metadata for Migration\n",
    "\n",
    "The report show the metadata of each app as:\n",
    "- Recommended GPU cluster\n",
    "- File location of full cluster config recommendations\n",
    "- File location of only Gpu specific config recommendations\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "133cf1bd-33b6-4a62-9ae2-5505717092d1",
     "showTitle": true,
     "title": "app_metadata.json"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "import json\n",
    "metadata_file = os.path.join(output_folder, \"app_metadata.json\")\n",
    "def camel_to_title(name):\n",
    "    return re.sub('([a-z])([A-Z])', r'\\1 \\2', name).title()\n",
    "  \n",
    "with open(metadata_file, 'r') as file:\n",
    "    json_data = json.load(file)\n",
    "\n",
    "df = pd.DataFrame(json_data)\n",
    "df['recommendedGpuCluster'] = df['clusterInfo'].apply(lambda x: x['recommendedCluster'])\n",
    "df['sourceCluster'] = df['clusterInfo'].apply(lambda x: x['sourceCluster'])\n",
    "df.drop(columns=['clusterInfo'], inplace=True)\n",
    "df = df[['appId', 'appName', 'estimatedGpuSpeedupCategory', 'recommendedGpuCluster', 'fullClusterConfigRecommendations', 'gpuConfigRecommendationBreakdown']]\n",
    "df.columns = [camel_to_title(col) for col in df.columns]\n",
    "display(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "6756159b-30ca-407a-ab6b-9c29ced01ea6",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Stages Output\n",
    "\n",
    "For each stage used in SQL operations, the Qualification tool generates the following information:\n",
    "\n",
    "1. App ID\n",
    "2. Stage ID\n",
    "3. Average Speedup Factor: The average estimated speed-up of all the operators in the given stage.\n",
    "4. Stage Task Duration: The amount of time spent in tasks of SQL DataFrame operations for the given stage.\n",
    "5. Unsupported Task Duration: The sum of task durations for the unsupported operators. For more details, see [Supported Operators](https://nvidia.github.io/spark-rapids/docs/supported_ops.html).\n",
    "6. Stage Estimated: Indicates if the stage duration had to be estimated (True or False).\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "cdde6177-db5f-434a-995b-776678a64a3a",
     "showTitle": true,
     "title": "rapids_4_spark_qualification_output_stages.csv"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "stages_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_stages.csv\"))\n",
    "display(stages_output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "4d7ce219-ae75-4a0c-a78c-4e7f25b8cd6f",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Execs Output\n",
    "\n",
    "The Qualification tool generates a report of the “Exec” in the “SparkPlan” or “Executor Nodes” along with the estimated acceleration on the GPU. Please refer to the [Supported Operators guide](https://nvidia.github.io/spark-rapids/docs/supported_ops.html) for more details on limitations on UDFs and unsupported operators.\n",
    "\n",
    "1. App ID\n",
    "2. SQL ID\n",
    "3. Exec Name: Example: Filter, HashAggregate\n",
    "4. Expression Name\n",
    "5. Task Speedup Factor: The average acceleration of the operators based on the original CPU duration of the operator divided by the GPU duration. The tool uses historical queries and benchmarks to estimate a speed-up at an individual operator level to calculate how much a specific operator would accelerate on GPU.\n",
    "6. Exec Duration: Wall-clock time measured from when the operator starts until it is completed.\n",
    "7. SQL Node ID\n",
    "8. Exec Is Supported: Indicates whether the Exec is supported by RAPIDS. Refer to the Supported Operators section for details.\n",
    "9. Exec Stages: An array of stage IDs.\n",
    "10. Exec Children\n",
    "11. Exec Children Node IDs\n",
    "12. Exec Should Remove: Indicates whether the Op is removed from the migrated plan.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "998b0c51-0cb6-408e-a01a-d1f5b1a61e1f",
     "showTitle": true,
     "title": "rapids_4_spark_qualification_output_execs.csv"
    },
    "jupyter": {
     "source_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "execs_output=pd.read_csv(os.path.join(jar_output_folder, \"rapids_4_spark_qualification_output_execs.csv\"))\n",
    "display(execs_output)"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [
    {
     "elements": [],
     "globalVars": {},
     "guid": "",
     "layoutOption": {
      "grid": true,
      "stack": true
     },
     "nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc",
     "origId": 2173122769183715,
     "title": "Executive View",
     "version": "DashboardViewV1",
     "width": 1600
    },
    {
     "elements": [],
     "globalVars": {},
     "guid": "",
     "layoutOption": {
      "grid": true,
      "stack": true
     },
     "nuid": "62243296-4562-4f06-90ac-d7a609f19c16",
     "origId": 2173122769183716,
     "title": "App View",
     "version": "DashboardViewV1",
     "width": 1920
    },
    {
     "elements": [],
     "globalVars": {},
     "guid": "",
     "layoutOption": {
      "grid": true,
      "stack": true
     },
     "nuid": "854f9c75-5977-42aa-b3dd-c680b8331f19",
     "origId": 2173122769183722,
     "title": "Untitled",
     "version": "DashboardViewV1",
     "width": 1024
    }
   ],
   "environmentMetadata": null,
   "language": "python",
   "notebookMetadata": {
    "mostRecentlyExecutedCommandWithImplicitDF": {
     "commandId": 2173122769183704,
     "dataframes": [
      "_sqldf"
     ]
    },
    "pythonIndentUnit": 2,
    "widgetLayout": [
     {
      "breakBefore": false,
      "name": "Eventlog Path",
      "width": 778
     },
     {
      "breakBefore": false,
      "name": "Output Path",
      "width": 302
     }
    ]
   },
   "notebookName": "[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template",
   "widgets": {
    "Eventlog Path": {
     "currentValue": "/dbfs/user1/qualification_logs",
     "nuid": "1272501d-5ad9-42be-ab62-35768b2fc384",
     "typedWidgetInfo": null,
     "widgetInfo": {
      "widgetType": "text",
      "defaultValue": "/dbfs/user1/qualification_logs",
      "label": "",
      "name": "Eventlog Path",
      "options": {
       "widgetType": "text",
       "autoCreated": false,
       "validationRegex": null
      }
     }
    },
    "Output Path": {
     "currentValue": "/tmp",
     "nuid": "ab7e082c-1ef9-4912-8fd7-51bf985eb9c1",
     "typedWidgetInfo": null,
     "widgetInfo": {
      "widgetType": "text",
      "defaultValue": "/tmp",
      "label": null,
      "name": "Output Path",
      "options": {
       "widgetType": "text",
       "autoCreated": null,
       "validationRegex": null
      }
     }
    }
   }
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
